'''
    author: starlee
    @2019.03.07
    利用简单的特征匹配进行推荐
        1. 提取用户的兴趣特征topic
        2. 用topic匹配文章，并过滤
        3. 对文章进行排序
'''
from dbop import conn

class TopicRec:
    '''推荐基类，提供基本接口'''
    def __init__(self):
        pass

    def rec(self, user_id, article_num=10):
        ''' 为用户推荐N个文章'''
        return []

class SimpleTopicRec(TopicRec):
    def __init__(self,uid=None):
        self.uid = uid

    def user_topic(self, Top_N = 10):
        '''返回用户所感兴趣的topic {topic_name: frequency}; 返回前Top_N个'''
        # 以后应该有个专门流计算实时更新 user-topic的对应关系表

        # 取出用户浏览过的文章
        self.u_reads = set()
        cursor = conn.cursor()
        cursor.execute('select aid from "iTags"."reads" where uid=%s',(self.uid,))
        read_articles = cursor.fetchall()
        # print(">> User %d has read:"%self.uid,[item[0] for item in read_articles])

        # 提取出文章的topic
        u_topics = {}
        for article in read_articles:# 用户如果多次阅读一个文章，目前就是当做两次独立的行为
            self.u_reads.add(article[0])
            cursor.execute('select tags from "iTags".articles where id=%s',article)
            # 依次累加文章的各个topic的权重
            a_topics = cursor.fetchone()
            if a_topics is None or a_topics[0] is None: # shit！not found article / null tags
                continue
            a_topics = a_topics[0]
            for _topic in a_topics.keys():
                if _topic not in u_topics:
                    u_topics[_topic] = 0
                u_topics[_topic] += a_topics[_topic]
        # print(">> User %d likes topics %s"%(self.uid,u_topics))
        cursor.close()

        self.u_topics = {item[0]:item[1] 
                            for item in sorted(u_topics.items(),key=lambda x:x[1],reverse=True)[:Top_N]
                        }

    def __topic_match_score(self, tlst1, tlst2):
        '''计算两个topic列表相似度得分
            tlst1: {topic_name: frequency}
            tlst2: {topic_name: weight(osc计算)}
        '''
        match_score = 0
        for t_name, freq in tlst1.items():
            if t_name in tlst2.keys():
                match_score += freq * tlst2[t_name]
        return match_score

    def sort_article(self):
        '''按照**策略对初步的推荐结果进行排序
            (aid, score)
        '''

        # 先计算用户topic和每个文章topic的相似度
        score = {}
        for aid in self.matched_articles.keys():
            a_topics = self.matched_articles[aid]["topics"]
            topic_sim = self.__topic_match_score(self.u_topics, a_topics)
            score[aid] = {"topic_sim":topic_sim}
        
        # 接下来再考虑文章的其它属性


        self.article_score = sorted(
                     [(item[0], item[1]["topic_sim"])  for item in score.items()],
                     key = lambda x: x[1],
                     reverse = True
                )

        # print(">> article score: ",self.article_score)

    def article_filter(self):
        '''对推荐结果做一定的过滤'''
        # 删除已经看过的
        for u_read in self.u_reads:
            try:
                del self.matched_articles[u_read]
            except Exception:
                pass
        # print(">> after omiting read: ",self.matched_articles.keys())
       
        # 其他的一些

    def match_article(self,weight_lim = 0.1):
        '''对每一个topi从，关联topic权重值大于weight_lim的文章'''
        # 后面应该是专门一个表，保存article-topic的对应关系

        cursor = conn.cursor()
        self.matched_articles = {}
        # 目前先考虑topic相似度和创建时间，后面可能还要考虑文章的阅读量、评论量（及情感分析）的指标
        for topic in self.u_topics.keys():
            cursor.execute('select id, tags, created_at from "iTags".articles where tags?%s',(topic,))
            for item in cursor.fetchall():
                aid, a_topics, a_time = item
                # 用dict不怕多个tag重复匹配同一个article
                self.matched_articles[aid] = {"topics": a_topics, "created_at": a_time} 
        cursor.close()
        
        # print(">> matched articles: ", self.matched_articles.keys())

    def rec(self, uid, article_num=10):
        
        self.uid = uid

        # step1. 先提取出用户的特征
        self.user_topic()

        # step2. 根据用户特征匹配文章
        self.match_article()

        # step3. 对文章进行过滤
        self.article_filter()

        # step4. 对文章进行排序
        self.sort_article()

        return self.article_score[:article_num]

if __name__ == "__main__":


    user_id = 15
    # # 完整测试
    # smptpRec = SimpleTopicRec()
    # rec_articles = smptpRec.rec(15)
    
    # print(">> Recommended articles:")
    # for ra in rec_articles:
    #     print("\t",ra)


    import time
    start_time = time.time()
    # 分步测试
    cursor = conn.cursor()
    smptpRec = SimpleTopicRec(user_id)

    # 看用户的topic
    smptpRec.user_topic()
    print("user_topic: ",time.time()-start_time)
    for t in smptpRec.u_topics.items():
        cursor.execute('select name from "iTags".tags where id=%s',(t[0],))
        tag = cursor.fetchone()
        if tag is None or tag[0] is None:
            continue
        print(tag[0], t[1])
    print("*"*20)

    # 推荐文章的情况
    start_time = time.time()
    smptpRec.match_article()
    print("match_article: ",time.time()-start_time)

    start_time = time.time()
    smptpRec.article_filter()
    print("filter_article: ",time.time()-start_time)

    start_time = time.time()
    smptpRec.sort_article()
    print("sort_article: ",time.time()-start_time)


    for at in smptpRec.article_score[:20]:
        aid, a_score = at
        cursor.execute('select title, tags, created_at from "iTags".articles where id=%s',(aid,))
        a_info = cursor.fetchone()
        if a_info is None:
            continue
        a_tags = []
        for tid in a_info[1].keys():
            cursor.execute('select name from "iTags".tags where id=%s',(tid,))
            tag = cursor.fetchone()
            if tag is None or tag[0] is None:
                continue
            a_tags.append(tag[0])

        print(aid, "、".join(a_tags)," >> ", a_info[0]," >> ", a_info[2])
        

    cursor.close()